/************************************************************************
Purpose: 	Adjust Phone Survey variables to fit our analysis			
*************************************************************************/

use "$PHONE_DATA_IN/Phone Survey All Covariates (Plant).dta", clear

* Set missing maintenance to 0
replace pv_modification_cost = 0 if pv_modification_cost == . 
replace bh_annu_maint_cost_lakh = 0 if bh_annu_maint_cost_lakh == . 
replace bh_cleaning_cost = 0 if bh_cleaning_cost == . 

* Remove component which makes maintenance cost non additive; add present value of modification
replace bh_annu_maint_cost_lakh = bh_annu_maint_cost_lakh - (bh_cleaning_cost/100000) + pv_modification_cost
replace bh_annu_maint_cost_lakh = round(bh_annu_maint_cost_lakh, 0.001)

********************************************************************************
* Boiler House Fuel Cost 
* this can be added back to any of the other iterations of total boiler cost
********************************************************************************

foreach i in 18_19 19_20{

	ds fuel_`i'_cost_*
	foreach var in `r(varlist)'  {
		replace `var' = 0 if `var' == .
		gen temp_`var' = .
	}

	replace fuel_`i'_cost_1 = 0 if imported_coal == 0 & imp_coal_lignite  == 0 & imp_coal_indian  == 0 
	replace fuel_`i'_cost_2 = 0 if diesel == 0
	replace fuel_`i'_cost_3 = 0 if lignite == 0 & imp_coal_lignite == 0
	replace fuel_`i'_cost_4 = 0 //LDO is not used as boiler fuel
	replace fuel_`i'_cost_5 = 0 if imported_coal == 0 & imp_coal_lignite  == 0 & imp_coal_indian  == 0 & lignite == 0  
	replace fuel_`i'_cost_6 = 0 //Natural Gas is not used as boiler fuel
	replace fuel_`i'_cost_7 = 0 if other_solid_fuel == 0 & bagasse == 0
	replace fuel_`i'_cost_8 = 0 if wood == 0
	replace fuel_`i'_cost_9 = 0 //bio diesel not boiler fuel

	gen boi_fuel_`i'_lakh = 0
	ds fuel_`i'_cost_*
	foreach var in `r(varlist)'  {
		replace boi_fuel_`i'_lakh = boi_fuel_`i'_lakh + `var'
	}
}

replace boi_fuel_18_19_lakh = . if fuel_cost_total_1819==.
replace boi_fuel_19_20_lakh = . if fuel_cost_total_1920==.

label var boi_fuel_18_19_lakh "Boiler Fuel Cost 18-19 (INR Lakhs)"
label var boi_fuel_19_20_lakh "Boiler Fuel Cost 19-20 (INR Lakhs)"

********************************************************************************
* Labor Costs 
********************************************************************************

* Office and prod costs should be nonzero; otherwise set as missing
replace tot_office_worker_cost = . if tot_office_worker_cost==0
replace tot_prod_worker_cost = . if tot_prod_worker_cost==0

// reorganize BH costs

* Fix an outlier
replace c6_3_boi_helper_sal = 12000 if c6_3_boi_helper_sal == 120000

* Create temporary dummy if no labor costs
gen d_bh_labor_costs = 0
ds c6_3_*
foreach var of varlist `r(varlist)' {
	replace d_bh_labor_costs = 1 if !missing(`var')
}

* Set missing values of BH engineer/masters costs to 0 (plausible they don't have any), 
* as long as they have some BH labor costs
foreach var of varlist bh_engineer_cost bh_an_master_cost {
	replace `var' = 0 if `var' == . & d_bh_labor_costs==1
	replace `var' = . if d_bh_labor_costs==0
}

// replace bh_annu_labor_cost with clean c6_3_boi_[tech/oper/helper] costs
** these look like they have better overall estimates for the plant

* if there is staff count, but no cost, then impute median cost of that category
foreach var in techni opera helper {
	quietly sum c6_3_boi_`var'_sal if d_bh_labor_costs==1, d
	replace c6_3_boi_`var'_sal = `r(p50)' if inlist(c6_3_boi_`var'_sal, ., 0) & !inlist(c6_3_boi_`var'_tot, ., 0)
}

* Set values to 0 or missing, depending on if any costs reported or not
ds c6_3_boi_techni_* c6_3_boi_opera_* c6_3_boi_helper_*
foreach var of varlist `r(varlist)' {
	replace `var' = . if d_bh_labor_costs==0
	replace `var' = 0 if missing(`var') & d_bh_labor_costs==1
}

* Aggregate (recall, these variables are monthly)
replace bh_annu_labor_cost = 0 
foreach var in techni opera helper {
	replace bh_annu_labor_cost = bh_annu_labor_cost + 12 * c6_3_boi_`var'_tot * c6_3_boi_`var'_sal
}
replace bh_annu_labor_cost = bh_annu_labor_cost/100000
replace bh_annu_labor_cost = . if d_bh_labor_costs==0
*drop d_bh_labor_costs

// replace bh_annu_labor_cost with the c6_3_ versions from the accounts


save "$PHONE_DATA_OUT/Phone Survey All Covariates (Plant)_Analysis.dta", replace
